from langchain_community.document_loaders import TextLoader,CSVLoader,JSONLoader

#文本加载
# loader = TextLoader("data/test.txt")
# documents = loader.load()
# print(documents)
# print(len(documents))
# print(documents[0].page_content[:10]) #打印前100个字符
# print(documents[0].metadata) #输出{"source":"data/test.txt"}


#csv加载
# loader = CSVLoader("data/test.csv", csv_args={"delimiter": ","})
# loader = CSVLoader("data/test.csv", csv_args={"fieldnames": ["产品名称","销售数量"]}) #指定列名
# documents = loader.load()
# # 每行转换为一个Document, metadata包含行号
# print(len(documents))
# print(documents[0].metadata)  # 输出: {'source': 'data.csv', 'row': 0}
# print(documents[0].page_content)

#Json加载
loader = JSONLoader(
    file_path="data/test.json",
    jq_schema=".articles[]",  # 提取articles数组中的每个元素
    content_key="content"   # 指定content字段作为文本内容
)

docs = loader.load()
print(len(docs))
print(docs[0])